{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e180af3a-3a2c-4186-a577-7051ec6460b1",
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install -qU elasticsearch sentence-transformers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "63d22ea2-ecca-41bb-b08f-de8ad49cda41",
   "metadata": {},
   "outputs": [],
   "source": [
    "# get the Elasticsearch client\n",
    "from elasticsearch import Elasticsearch\n",
    "from getpass import getpass\n",
    "\n",
    "ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n",
    "ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n",
    "\n",
    "client = Elasticsearch(\n",
    "    cloud_id=ELASTIC_CLOUD_ID,\n",
    "    api_key=ELASTIC_API_KEY,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b367acaa-90e6-43d0-b9ae-cf42a0e2c0f1",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "from urllib.request import urlopen\n",
    "from sentence_transformers import SentenceTransformer\n",
    "\n",
    "if NBTEST[\"notebook\"] in [\n",
    "    \"01-keyword-querying-filtering.ipynb\",\n",
    "    \"02-hybrid-search.ipynb\",\n",
    "    \"06-synonyms-api.ipynb\",\n",
    "]:\n",
    "    # these tests need book_index to exist ahead of time\n",
    "    client.indices.delete(index=\"book_index\", ignore_unavailable=True)\n",
    "\n",
    "    mappings = {\n",
    "        \"properties\": {\n",
    "            \"title_vector\": {\n",
    "                \"type\": \"dense_vector\",\n",
    "                \"dims\": 384,\n",
    "                \"index\": \"true\",\n",
    "                \"similarity\": \"cosine\",\n",
    "            }\n",
    "        }\n",
    "    }\n",
    "    client.indices.create(index=\"book_index\", mappings=mappings)\n",
    "\n",
    "    url = \"https://raw.githubusercontent.com/elastic/elasticsearch-labs/main/notebooks/search/data.json\"\n",
    "    response = urlopen(url)\n",
    "    books = json.loads(response.read())\n",
    "\n",
    "    model = SentenceTransformer(\"all-MiniLM-L6-v2\")\n",
    "    operations = []\n",
    "    for book in books:\n",
    "        operations.append({\"index\": {\"_index\": \"book_index\"}})\n",
    "        # Transforming the title into an embedding using the model\n",
    "        book[\"title_vector\"] = model.encode(book[\"title\"]).tolist()\n",
    "        operations.append(book)\n",
    "    client.bulk(index=\"book_index\", operations=operations, refresh=True)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
